***********************************************
* REPLICATION PROGRAM FOR Evaluating the effects of geographic adjustments on poverty measures using self-reported financial well-being 
* Jeff Larrimore
* July 2024
***********************************************

*Note: The file NFCS_zip.dta referenced in this program contains the zip codes of FINRA survey resondents.
*Under the terms of our data agreement, this file cannot be posted publicly in the replication dataset but it can be obtained by contacting the FINRA foundation 

cd "[your directory here]"

********************************************************************
********************************************************************
* STEP 0: NFCS data preparation
********************************************************************
********************************************************************

**** SET UP THE 2018 NFCS DATA

import delimited "NFCS_2018_State_Data_190603.csv", clear

gen fwb1_exp = .
gen fwb2_getby = j41_2
gen fwb3_secure = .
gen fwb4_concern = j41_3
gen fwb5_never = j41_1
gen fwb6_enjoy = .
gen fwb7_behind = .
gen fwb8_control = j42_2
gen fwb9_strain = .
gen fwb10_left = j42_1

*Recode NIU/refused to missing for the scale
replace fwb2 = . if fwb2>90
replace fwb4 = . if fwb4>90
replace fwb5 = . if fwb5>90
replace fwb8 = . if fwb8>90
replace fwb10 = . if fwb10>90

*Recode all values to be 0-4 instead of 1-5
replace fwb2 = fwb2 - 1
replace fwb4 = fwb4 - 1
replace fwb5 = fwb5 - 1
replace fwb8 = fwb8 - 1
replace fwb10 = fwb10 - 1

*Flip the order so that higher values are more positive
foreach v in fwb2_getby fwb4_concern fwb5_never fwb8_control {
      replace `v'=(4-`v')
}

*Set the mode of taking the survey
gen self = 1
*set the age of respondents *** NOTE THAT ages 55 to 64 are included as 18 to 61 since the public data does not have ages 61-64.
gen age18_61 = 0
replace age18_61 = 1 if a3ar_w<=5

pfwb FWB, replace missok

save "NFCS_Microdata_with_FWB", replace

**** SET UP THE 2021 NFCS DATA

use "NFCS 2021 State Data 220627.dta", clear

rename *, lower

gen fwb1_exp = .
gen fwb2_getby = j41_2
gen fwb3_secure = .
gen fwb4_concern = j41_3
gen fwb5_never = j41_1
gen fwb6_enjoy = .
gen fwb7_behind = .
gen fwb8_control = j42_2
gen fwb9_strain = .
gen fwb10_left = j42_1

*Recode NIU/refused to missing for the scale
replace fwb2 = . if fwb2>90
replace fwb4 = . if fwb4>90
replace fwb5 = . if fwb5>90
replace fwb8 = . if fwb8>90
replace fwb10 = . if fwb10>90

*Recode all values to be 0-4 instead of 1-5
replace fwb2 = fwb2 - 1
replace fwb4 = fwb4 - 1
replace fwb5 = fwb5 - 1
replace fwb8 = fwb8 - 1
replace fwb10 = fwb10 - 1

*Flip the order so that higher values are more positive
foreach v in fwb2_getby fwb4_concern fwb5_never fwb8_control {
      replace `v'=(4-`v')
}

*Set the mode of taking the survey
gen self = 1
*set the age of respondents *** NOTE THAT ages 55 to 64 are included as 18 to 61 since the public data does not have ages 61-64.
gen age18_61 = 0
replace age18_61 = 1 if a3ar_w<=5

pfwb FWB, replace missok

gen year = 2021
rename a8_2021 a8

save "NFCS_Microdata_with_FWB_2021", replace

********************************************************************
********************************************************************
* STEP 1: SET UP CPS DATA
********************************************************************
********************************************************************

	use "cps_00078.dta", clear

	*Restrict to adults (matching FINRA and SHED)
	keep if age>=18

	gen surveyyear = year
	replace year = year - 1

	gen msastatus = 0 if metro==1
	*calling "not identifiable" (about 1% of sample) metro since most people are metro
	replace msastatus = 1 if metro>=2 | metro==0

	*Replace missing values with zero
	replace inctot = 0 if inctot==99999999
	replace inctot = 0 if inctot==99999998

*********
* DEFINE FAMILY UNITS TO MATCH OPM AND SPM DEFINITIONS
*********

	*Create the family unit for the official poverty measure by including related subfamilies with the primary family 
	gen opmfamunit = famid
	replace opmfamunit = 1 if ftype==3 
	sort year serial opmfamunit
	by year serial opmfamunit: gen opmsize = _N
	
*********
* GENERATE SPM POVERTY RATIOS USING DIFFERENT THRESHOLD DEFINITIONS
* These are used to rank-order individuals when anchoring the poverty definitions
*********

	*Create the base threshold by ownership status (matches published base threshold values)
	gen spm_basethres = spmthres / spmgeoadj
	*Original SPM ratio ratio
	gen ratio_spm = spmtotres/spmthresh
	*Remove geographic adjustment from threshold
	gen ratio_spm_nogeog = spmtotres/spm_basethres
	*Switch to the OPM threshold - matches the official poverty rate
	gen ratio_off = offtotval/offcutoff
	
*********
* GENERATE POVERTY FLAGS (UNANCHORED) 
*********
	
	gen pov_spm = 0
	gen pov_spm_nogeog = 0
	replace pov_spm = 1 if ratio_spm<1
	replace pov_spm_nogeog = 1 if ratio_spm_nogeog<1

	replace offpov = 0 if offpov!=1
	gen pov_off = offpov

*********
* GENERATE ANCHORED POVERTY FLAGS FOR ALL INCOME DEFINITIONS
*********
	
	*Rank-order people using each income definition
	set seed 29815771
	gen rand = runiform()
	sort year
	by year: egen double totalwt = total(asecwt)

	foreach var in spm spm_nogeog off {
		sort year ratio_`var' rand
		by year: gen double temp_`var' = sum(asecwt)
		gen rank_`var' = temp_`var'/totalwt
	}
		
	drop temp_*

	* CONSTRUCT CONSISTENT SPM-ANCHORED POVERTY FLAGS
	foreach var in spm spm_nogeog off {
		gen pov_anc_`var' = 0
	}
	forval yr = 2017/2022{
		sum pov_spm if year==`yr' [aw=asecwt]
		global povrate = r(mean)
		foreach inc in spm spm_nogeog off {
			replace pov_anc_`inc' = 1 if rank_`inc'<=$povrate & year==`yr'
		}
	}

***********************************************
* CREATE THE FILE OF STATE POVERTY RATES FOR COMPARISON TO CFPB SCORES
***********************************************

*Restrict to adults to match the CFPB score approach
keep if age>=18

gen n = 1

gen o64 = 0
replace o64 = 1 if age>=65

preserve
	collapse (mean) pov_* ratio_spm spmgeoadj (sum) n [iw=asecwt], by(year statefip o64)
	save "pov_measures_by_state_o64", replace
restore

***********************************************
* DETERMINE METRO (CBSA) FIPS
***********************************************

preserve
	collapse (mean) spmgeoadj metarea (sum) n [iw=asecwt], by(year statefip metfips o64)
	rename spmgeoadj spmgeoadj_metro
	rename metfips metfips_numeric
	gen str5 metfips = string(metfips_numeric,"%05.0f")
	*Including missing with nonmetro and code as 99999 to match the crosswalk file
	replace metfips = "99999" if metfips=="99998"
	save "pov_measures_by_metro_o64", replace
restore

********************************************************************
********************************************************************
* STEP 2: SET UP THE ZIP-CBSA CROSSAWALK FILE (NFCS USES ZIP CODE)
********************************************************************
********************************************************************
              
	import excel "ZIP_CBSA_122021.xlsx", sheet("SQLT0001") firstrow allstring clear
	destring res_ratio, replace force
	sort zip res_ratio
	by zip: keep if _n==_N
	save "ZIP_CBSA_122021.dta", replace

********************************************************************
********************************************************************
* STEP 3: MERGE CENSUS AND NFCS DATA - CREATE RESULTS
********************************************************************
********************************************************************

	use "NFCS_Microdata_with_FWB", clear
	keep FWB a3ar_w a8 a11 a4a_new_w stateq wgt_n2 wgt_s3 a5_2015 nfcsid ea_1 e7
	gen year = 2018
	destring e7, replace force
	append using "NFCS_Microdata_with_FWB_2021", nolabel
	replace year = 2021 if year==.
	keep year FWB a3ar_w a8 a11 a4a_new_w stateq wgt_n2 wgt_s3 a5_2015 nfcsid ea_1 e7
	joinby nfcsid using "NFCS_zip.dta", unmatched(master)
	drop _merge
	rename A2 zip
	destring e7, replace force
	count
	joinby zip using "ZIP_CBSA_122021.dta", unmatched(master)
	rename cbsa metfips
	replace metfips = "99999" if metfips==""

	gen o64 = 0
	replace o64 = 1 if a3ar_w==6

	gen own_mortgage = .
	replace own_mortgage = 0 if ea_1==2 // rent
	replace own_mortgage = 1 if ea_1==1 & e7==2  // own no mortgage
	replace own_mortgage = 2 if ea_1==1 & e7==1  // own no mortgage

	gen inc_cat = a8

	gen parent = 0
	replace parent = 1 if a11<=4

	gen race_simple = a4a_new_w

	gen statefip = .
	replace statefip = 1 if stateq==1
	replace statefip = 2 if stateq==2
	replace statefip = 4 if stateq==3
	replace statefip = 5 if stateq==4
	replace statefip = 6 if stateq==5
	replace statefip = 8 if stateq==6
	replace statefip = 9 if stateq==7
	replace statefip = 10 if stateq==8
	replace statefip = 11 if stateq==9
	replace statefip = 12 if stateq==10
	replace statefip = 13 if stateq==11
	replace statefip = 15 if stateq==12
	replace statefip = 16 if stateq==13
	replace statefip = 17 if stateq==14
	replace statefip = 18 if stateq==15
	replace statefip = 19 if stateq==16
	replace statefip = 20 if stateq==17
	replace statefip = 21 if stateq==18
	replace statefip = 22 if stateq==19
	replace statefip = 23 if stateq==20
	replace statefip = 24 if stateq==21
	replace statefip = 25 if stateq==22
	replace statefip = 26 if stateq==23
	replace statefip = 27 if stateq==24
	replace statefip = 28 if stateq==25
	replace statefip = 29 if stateq==26
	replace statefip = 30 if stateq==27
	replace statefip = 31 if stateq==28
	replace statefip = 32 if stateq==29
	replace statefip = 33 if stateq==30
	replace statefip = 34 if stateq==31
	replace statefip = 35 if stateq==32
	replace statefip = 36 if stateq==33
	replace statefip = 37 if stateq==34
	replace statefip = 38 if stateq==35
	replace statefip = 39 if stateq==36
	replace statefip = 40 if stateq==37
	replace statefip = 41 if stateq==38
	replace statefip = 42 if stateq==39
	replace statefip = 44 if stateq==40
	replace statefip = 45 if stateq==41
	replace statefip = 46 if stateq==42
	replace statefip = 47 if stateq==43
	replace statefip = 48 if stateq==44
	replace statefip = 49 if stateq==45
	replace statefip = 50 if stateq==46
	replace statefip = 51 if stateq==47
	replace statefip = 53 if stateq==48
	replace statefip = 54 if stateq==49
	replace statefip = 55 if stateq==50
	replace statefip = 56 if stateq==51

	***Merge in the Census poverty measures
	*At the state level
	count
	capture drop _merge
	joinby year statefip o64 using "pov_measures_by_state_o64", unmatched(master)
	tab _merge
	count
	*At the CBSA metro level
	joinby year statefip metfips o64 using "pov_measures_by_metro_o64", unmatched(master) _merge(metro_merge_orig)
	tab metro_merge_orig
	*recode non-found to rest of state (this appears to match how Census places them) then remerge
	gen metfips_orig = metfips
	replace metfips = "99999" if metro_merge==1
	drop _merge
	joinby year statefip metfips o64 using "pov_measures_by_metro_o64", unmatched(master) update

	gen fwb_very_low = (FWB<30)
	gen fwb_low = (FWB<38)

*CREATE FIGURE 1 OUTPUT
*Note that all of the CPS variables are the same within a state so weight doesn't matter for them. But need wgt_n2 for the fwb scores
preserve
	gen finra_wgt = 1
	collapse (mean) pov_* ratio_spm spmgeoadj fwb_very_low fwb_low n (sum) finra_wgt [iw=wgt_n2], by(year statefip o64)
	keep if year==2018 | year==2021
	sort o64 year statefip 
	save "Figure1_output", replace
restore

*collapsing the top two income groups in 2021	
replace a8 = 9 if a8==10

*TABLE A1
reg fwb_very_low pov_off [aw=wgt_n2] if o64==0
reg fwb_very_low pov_anc_off [aw=wgt_n2] if o64==0
reg fwb_very_low pov_anc_spm_nogeog [aw=wgt_n2] if o64==0
reg fwb_very_low pov_spm [aw=wgt_n2] if o64==0

reg fwb_very_low pov_off parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0 
reg fwb_very_low pov_anc_off parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0 
reg fwb_very_low pov_spm_nogeog parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0 
reg fwb_very_low pov_spm parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0 

*For footnote (looking at low or very low)
reg fwb_low pov_off [aw=wgt_n2] if o64==0
reg fwb_low pov_spm [aw=wgt_n2] if o64==0

*NOT SHOWN IN PAPER: REPLICATION OF TABLE 1 FOR OLDER ADULTS
reg fwb_very_low pov_off [aw=wgt_n2] if o64==1
reg fwb_very_low pov_anc_off [aw=wgt_n2] if o64==1
reg fwb_very_low pov_spm_nogeog [aw=wgt_n2] if o64==1
reg fwb_very_low pov_spm [aw=wgt_n2] if o64==1

*TABLE A2 
reg fwb_very_low spmgeoadj_metro [aw=wgt_n2] if o64==0
reg fwb_very_low spmgeoadj_metro parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0
reg fwb_very_low spmgeoadj_metro parent race_simple i.year i.a3ar_w i.a5 i.a8 [aw=wgt_n2] if o64==0

*TABLE A2 ROBUSTNESS (state-level results)
reg fwb_very_low spmgeoadj [aw=wgt_n2] if o64==0
reg fwb_very_low spmgeoadj parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0
reg fwb_very_low spmgeoadj parent race_simple i.year i.a3ar_w i.a5 i.a8 [aw=wgt_n2] if o64==0

*TABLE A3 
reg FWB spmgeoadj_metro [aw=wgt_n2] if o64==0
reg FWB spmgeoadj_metro parent race_simple i.year i.a3ar_w i.a5 [aw=wgt_n2] if o64==0
reg FWB spmgeoadj_metro parent race_simple i.year i.a3ar_w i.a5 i.a8 [aw=wgt_n2] if o64==0

*FIGURE 2 - CBSA
capture program drop interaction_se
program define interaction_se, rclass
	reg FWB parent race_simple i.year i.a3ar_w i.a5 c.spmgeoadj_metro##a8 [aw=wgt_n2] if o64==0
	capture drop b_0
	capture drop b_1 
	gen b_0 = _b[spmgeoadj]
	forval i = 2/9 {
		capture drop b_`i'
		gen b_`i' = _b[`i'.a8#c.spmgeoadj]
	}
	capture drop sum_b1
	gen sum_b1 = b_0 + 0
	sum sum_b1
	return scalar x1 = r(mean)
	forval i = 2/9 {
		capture drop sum_b`i'
		gen sum_b`i' = b_0 + b_`i'
		sum sum_b`i'
		return scalar x`i' = r(mean)
	}
end
*bootstrap r(x1 x2 x3 x4 x5 x6 x7 x8 x9 x10), reps(50): interaction_se
bootstrap r(x1) r(x2) r(x3) r(x4) r(x5) r(x6) r(x7) r(x8) r(x9), reps(100): interaction_se

********************************************************************
********************************************************************
* SHED RESULTS
********************************************************************
********************************************************************
use "publicSHED2021.dta", clear
gen year = 2021
append using "publicSHED2020.dta", force
replace year = 2020 if year==.
append using "publicSHED2019.dta", force
replace year = 2019 if year==.
append using "publicSHED2018.dta", force
replace weight = weight2 if weight==.
replace year = 2018 if year==.
append using "publicSHED2017.dta", force
replace year = 2017 if year==.
*In the public data for 2017 there are only population weights - convert to sample weight
sum weight3b if year==2017
replace weight = weight3b/r(mean) if weight==. & year==2017

capture gen race_4cat = .
replace race_4 = 1 if ppethm==1 & race_4==.
replace race_4 = 2 if ppethm==2 & race_4==.
replace race_4 = 3 if ppethm==4 & race_4==.
replace race_4 = 4 if (ppethm==3 | ppethm==5) & race_4==.

*Drop the small 2017 spanish language sample
drop if weight5!=. & weight==. 
capture drop educ_4cat
*code educ_4cat to match the SHED report
capture gen educ_4cat = .
replace educ_4cat = 1 if (ED0==1 | ED0==2) & educ_4cat==.
replace educ_4cat = 2 if (ED0==3 | ED0==4) & educ_4cat==.
replace educ_4cat = 3 if (ED0==5) & educ_4cat==.
replace educ_4cat = 4 if (ED0>=6 & ED0<=9) & educ_4cat==.

keep ppage I40 L0_b B2 weight ppstate year race_4cat ppagecat B0_a B0_b B0_c B1_a B1_b atleast_okay ED0 educ_4cat ppethm
joinby ppstaten using "state_fips_list.dta", unmatched(master)
tab _merge
drop _merge

gen o64 = (ppage>=65)
gen parent = (L0_b==1)

joinby year statefip o64 using "pov_measures_by_state_o64", unmatched(master)

drop if B2==. | B2==-1
gen B2_very_low = (B2==1)

*Table A4
reg B2_very_low pov_off [aw=weight] if o64==0
reg B2_very_low pov_anc_off [aw=weight] if o64==0
reg B2_very_low pov_spm_nogeog [aw=weight] if o64==0
reg B2_very_low pov_spm [aw=weight] if o64==0
*Table A4 part 2
reg B2_very_low pov_off parent i.race_4cat i.year i.ppagecat i.educ_4cat [aw=weight] if o64==0
reg B2_very_low pov_anc_off parent i.race_4cat i.year i.ppagecat i.educ_4cat [aw=weight] if o64==0
reg B2_very_low pov_spm_nogeog parent i.race_4cat i.year i.ppagecat i.educ_4cat [aw=weight] if o64==0
reg B2_very_low pov_spm parent i.race_4cat i.year i.ppagecat i.educ_4cat [aw=weight] if o64==0

*Check with year controls only 
reg B2_very_low pov_off i.year [aw=weight] if o64==0
reg B2_very_low pov_anc_off i.year [aw=weight] if o64==0
reg B2_very_low pov_spm_nogeog i.year [aw=weight] if o64==0
reg B2_very_low pov_spm i.year [aw=weight] if o64==0

